/******************************************************************************
January 18, 2017
Sui-Jade Ho
Dimitrije Ruzic

Input(s): Manufacturing_1977_2001.dta

This .do file has 2 sections.
SECTION 1:
	- Imports the trimmed sample restriction
	- Constructs measures of productivity and distortions
	- Constructs and exports industry counterfactuals
	- Constructs aggregate counterfactual
	
******************************************************************************/

clear programs
clear all
set more off
set matsize 11000

local data "/FILE PATH GOES HERE/"

local alphaL = "$alphaL"
local alphaK = "$alphaK"
local sigma = "$sigma"
local dtypes "ASM CMF"
local fname = "$fname"
local pftype = "$pftype"
local dtrim = "$dtrim"

/*SECTION 1: Constructing productivity and distortion measures*/
local S1 = 1

/*********************************************************
SECTION 1: 
	- Imports trimmed sample restriction
	- Construct measures of productivity and distortion
	- 
		
**********************************************************/
if `S1' == 1 {
!gunzip /FILE PATH GOES HERE/Manufacturing_1977_2009.dta.gz
use "`data'Manufacturing_1977_2009.dta", clear
*!gzip /FILE PATH GOES HERE/Manufacturing_1977_2009.dta

keep et fk_naics02 year lbdnum PY2 Y2_* L K sw R wt
drop if year>2007

/*Merge in sample restriction*/
if "`dtrim'" == "fixed" {
merge 1:1 lbdnum year fk_naics02 using "`data'Misallocation_Sample_vrts_1acf_sest_5y"
drop _m
}

if "`dtrim'" == "none" {
gen sample_ASM = 1 if et==0
gen sample_CMF = 1 if year==1977 | year==1982 | year==1987 | year == 1992 | year == 1997 | year == 2002 | year == 2007
}

if "`dtrim'" == "own" & "`pftype'" == "5y" {
merge 1:1 lbdnum year fk_naics02 using "`data'Misallocation_Sample_vrts_1acf_sest_5y"
drop _m
}

if "`dtrim'" == "own" & "`pftype'" == "5y_2" {
merge 1:1 lbdnum year fk_naics02 using "`data'Misallocation_Sample_vrts_2acf_sest_5y"
drop _m
}

if "`dtrim'" == "own" & "`pftype'" == "10y" {
merge 1:1 lbdnum year fk_naics02 using "`data'Misallocation_Sample_vrts_1acf_sest_10y"
drop _m
}

if "`dtrim'" == "own" & "`pftype'" == "base" {
merge 1:1 lbdnum year fk_naics02 using "`data'Misallocation_Sample_vrts_1acf_sest_base"
drop _m
}

	/*wt = 1 when wt = 0*/
	replace wt = 1 if wt == 0


gen period = 1 if year<=1982
	replace period = 2 if year>1982 & year<=1987
	replace period = 3 if year>1987 & year<=1992
	replace period = 4 if year>1992 & year<=1997
	replace period = 5 if year>1997 & year<=2002
	replace period = 6 if year>2002 & year<=2007
	
	
gen dec = 1 if year<=1987
	replace dec = 2 if year>1987 & year<=1997
	replace dec = 3 if year>1997 & year<=2007

/*Merge in production-function parameters*/
*merge m:1 fk year using "`data'PF_FK_All.dta"
merge m:1 fk period dec using "`data'PF_FK_ACF.dta"
drop if _m==2
drop _m

gen alphaL = `alphaL'
gen alphaK = `alphaK'
gen sigma = `sigma'
*gen Y2 = Y2_sigma`sigmaY'
gen double Y2 = (PY2)^(sigma/(sigma-1))

drop *acf_* *_hk_* sigma_* period dec

/*Dealing with potentially censored observations*/
drop if alphaL==.
drop if alphaK==.

/*Dealing with industries where imposing sigma leads the markup to exceed RTS*/
*drop if alphaL+alphaK>sigma/(sigma-1)

/*Calculate physical productivity, TFPQ*/
gen double tfpq_ASM = Y2/((L^alphaL)*(K^alphaK))
gen double tfpq_CMF = tfpq_ASM

	replace tfpq_ASM = . if sample_ASM!=1
	replace tfpq_CMF = . if sample_CMF!=1

/*Calculate relative distortions*/
local dtypes "ASM CMF"
sort fk_naics02 year
foreach d of local dtypes {
	if "`d'"=="ASM" {
		gen temp = PY2*wt if sample_`d'==1
		by fk_naics02 year: egen py_weight_total = sum(temp) if sample_`d'==1
		gen py_weight = (temp)/py_weight_total
		drop temp py_weight_total
		
		gen temp = (K/(PY2)) if sample_`d'==1
		gen temp2 = temp*py_weight
		by fk_naics02 year: egen tauk_bar = sum(temp2)
		gen tauk_`d' = tauk_bar/temp
		drop tauk_bar temp temp2
		
		gen temp = (L/(PY2)) if sample_`d'==1
		gen temp2 = temp*py_weight
		by fk_naics02 year: egen taul_bar = sum(temp2)
		gen taul_`d' = taul_bar/temp
		drop taul_bar temp temp2 py_weight
		
		}
	if "`d'"=="CMF" {
		gen temp = PY2 if sample_`d'==1
		by fk_naics02 year: egen py_weight_total = sum(temp) if sample_`d'==1
		gen py_weight = (temp)/py_weight_total
		drop temp py_weight_total

		gen temp = (K/(PY2)) if sample_`d'==1
		gen temp2 = temp*py_weight
		by fk_naics02 year: egen tauk_bar = sum(temp2)
		gen tauk_`d' = tauk_bar/temp
		drop tauk_bar temp temp2
		
		gen temp = (L/(PY2)) if sample_`d'==1
		gen temp2 = temp*py_weight
		by fk_naics02 year: egen taul_bar = sum(temp2)
		gen taul_`d' = taul_bar/temp
		drop taul_bar temp temp2 py_weight
		
		}
	}
	
/*Calculate TFPR Ratio and Counterfactual TFPR Ratio 
TFPR_bar/TFPR = (PY_i/PY_ie)^(1-beta) * (MRPK_barMRPK)^alphaK * (MRPL_bar/MRPL)^alphaL */
local dtypes "ASM CMF"
sort fk_naics02 year
foreach d of local dtypes {
	/*Part 1: PY Ratio:
		Note: whenever we want to construct a weighted average, we need 
		to account for sampling bias. Hence, a weight is
		(wt*py)/(sum of wt*py)
		However, when we want to calculate the TFPR ratio, we need to 
		know the size of the firm in the industry, and the total industry
		size has to be adjusted for sampling bias, but we don't want to
		allows the weight to affect firm size. Hence, 
		tfpr1 = (sum(wt*py)/py)^(1-beta)
		
		and when we sum the A*TFPR terms, we do wt*(A*TFPR)^(sigma-1)
		to ensure that we still have a representative industry*/
	if "`d'"=="ASM" {
		gen py_weighted = PY2*wt if sample_`d'==1
		by fk_naics02 year: egen py_weighted_sum = sum(py_weighted) if sample_`d'==1
		}
	if "`d'"=="CMF" {
		gen py_weighted = PY2 if sample_`d'==1
		by fk_naics02 year: egen py_weighted_sum = sum(py_weighted) if sample_`d'==1
		}
		
	gen double tfpr1 = (py_weighted_sum/PY2)^(1-alphaL-alphaK) if sample_`d'==1
	
	/*Part 2: MRPK Ratio*/
	gen double tfpr2 = (tauk_`d')^(-alphaK)
	
	/*Part 3: MRPL Ratio*/
	gen double tfpr3 = (taul_`d')^(-alphaL)
	
	gen double tfpr_ratio_`d' = tfpr1*tfpr2*tfpr3
	
	drop tfpr1 tfpr2 tfpr3 py_weighted py_weighted_sum

	/*Counterfactual TFPR Ratio*/
	bys fk year: egen tempmean = mean(tfpq_`d')
	gen tempnorm = tfpq_`d'/tempmean
	gen double temp = tempnorm^((sigma-1)/(alphaK+alphaL+sigma*(1-alphaK-alphaL)))
	if "`d'"=="ASM" {
		by fk_naics02 year: egen double temp2 = sum(temp*wt)
		}
	if "`d'"=="CMF" {
		by fk_naics02 year: egen double temp2 = sum(temp)
		}
		
	gen double tfpr2_ratio_cf_`d' = (temp2/temp)^(1-alphaK-alphaL)
		drop temp temp2 tempmean tempnorm
	
	}

/*Industry TFP*/
sort fk_naics02 year

gen double temp = (tfpq_ASM*tfpr_ratio_ASM) if sample_ASM==1
by fk year: egen double tempmean = mean(temp)
gen double tempnorm = temp/tempmean
gen double temp2 = wt*tempnorm^(sigma-1) if sample_ASM==1
by fk_naics02 year: egen double temp3 = sum(temp2)
gen double TFP2_ASM = temp3^(1/(sigma-1))*tempmean
drop temp temp2 temp3 tempmean tempnorm

gen double temp = (tfpq_CMF*tfpr_ratio_CMF) if sample_CMF==1
by fk year: egen double tempmean = mean(temp)
gen double tempnorm = temp/tempmean
gen double temp2 = tempnorm^(sigma-1) if sample_CMF==1
by fk_naics02 year: egen double temp3 = sum(temp2)
gen double TFP2_CMF = temp3^(1/(sigma-1))*tempmean
drop temp temp2 temp3 tempmean tempnorm

/*Counterfactual Industry TFP*/
sort fk_naics02 year

gen double temp = (tfpq_ASM*tfpr2_ratio_cf_ASM) if sample_ASM==1
by fk year: egen double tempmean = mean(temp)
gen double tempnorm = temp/tempmean
gen double temp2 = wt*tempnorm^(sigma-1) if sample_ASM==1
by fk_naics02 year: egen double temp3 = sum(temp2)
gen double TFP3_cf_ASM = temp3^(1/(sigma-1))*tempmean
drop temp temp2 temp3 tempmean tempnorm

gen double temp = (tfpq_CMF*tfpr2_ratio_cf_CMF) if sample_CMF==1
by fk year: egen double tempmean = mean(temp)
gen double tempnorm = temp/tempmean
gen double temp2 = tempnorm^(sigma-1) if sample_CMF==1
by fk_naics02 year: egen double temp3 = sum(temp2)
gen double TFP3_cf_CMF = temp3^(1/(sigma-1))*tempmean
drop temp temp2 temp3 tempmean tempnorm

/*Industry Misallocation*/
gen double M3_i_ASM = TFP3_cf_ASM/TFP2_ASM
gen double M3_i_CMF = TFP3_cf_CMF/TFP2_CMF

/*Keeping and cleaning the third version of the calculation*/
gen double M_i_ASM = M3_i_ASM
gen double M_i_CMF = M3_i_CMF


	/*Flagging Industry Observations when RTS>markup*/
	gen flag = 1 if (alphaL+alphaK)/(sigma/(sigma-1))>0.995
	replace M_i_ASM = . if flag==1
	replace M_i_CMF = . if flag==1

/*Aggregate Misallocation*/
local dtypes "ASM CMF"
foreach d of local dtypes {
	/*Industry Value-Added*/
	if "`d'"=="ASM" {
		gen py_weighted = PY2*wt if sample_`d'==1 & flag!=1
		by fk_naics02 year: egen py_weighted_sum_`d' = sum(py_weighted) if sample_`d'==1
		drop py_weighted
		}
	if "`d'"=="CMF" {
		gen py_weighted = PY2 if sample_`d'==1 & flag!=1
		by fk_naics02 year: egen py_weighted_sum_`d' = sum(py_weighted) if sample_`d'==1
		drop py_weighted
		}
	}
collapse (mean) M3_* M_* py_weighted* alphaL alphaK sigma flag, by(fk_naics02 year)

local dtypes "ASM CMF"
foreach d of local dtypes {
	bys year: egen py_total = sum(py_weighted_sum_`d')
	gen sh_py_`d' = py_weighted_sum_`d'/py_total
	drop py_total
	
	gen temp = sh_py_`d'*log(M_i_`d')
	by year: egen temp2 = sum(temp)
	gen M_`d'=exp(temp2)
	drop temp temp2
	}
	
keep year fk_naics02 M* alphaL alphaK sigma flag
	foreach var of varlist M* alphaL alphaK sigma flag {
		rename `var' `var'_CM_`fname'
		}

save "`data'Misallocation_Industry_CM_`fname'.dta", replace

}






